#importing requried libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
#reading datasets
data=pd.read_csv("Unemployment in India.csv")
data=pd.read_csv("Unemployment_Rate_upto_11_2020 (1).csv")
data.head(5)
| Region | Date | Frequency | Estimated Unemployment Rate (%) | Estimated Employed | Estimated Labour Participation Rate (%) | Region.1 | longitude | latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-01-2020 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.74 |
| 1 | Andhra Pradesh | 29-02-2020 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.74 |
| 2 | Andhra Pradesh | 31-03-2020 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.74 |
| 3 | Andhra Pradesh | 30-04-2020 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.74 |
| 4 | Andhra Pradesh | 31-05-2020 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.74 |
#checking is there any null values are present in our dataset
data.isnull().sum()
Region 0 Date 0 Frequency 0 Estimated Unemployment Rate (%) 0 Estimated Employed 0 Estimated Labour Participation Rate (%) 0 Region.1 0 longitude 0 latitude 0 dtype: int64
#changing columns
data.columns = ['States', 'Date', 'Frequency', 'Estimated Unemployment Rate',
'Estimated Employed', 'Estimated Labour Participation Rate',
'Region', 'longitude', 'latitude']
data.head(5)
| States | Date | Frequency | Estimated Unemployment Rate | Estimated Employed | Estimated Labour Participation Rate | Region | longitude | latitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Andhra Pradesh | 31-01-2020 | M | 5.48 | 16635535 | 41.02 | South | 15.9129 | 79.74 |
| 1 | Andhra Pradesh | 29-02-2020 | M | 5.83 | 16545652 | 40.90 | South | 15.9129 | 79.74 |
| 2 | Andhra Pradesh | 31-03-2020 | M | 5.79 | 15881197 | 39.18 | South | 15.9129 | 79.74 |
| 3 | Andhra Pradesh | 30-04-2020 | M | 20.51 | 11336911 | 33.10 | South | 15.9129 | 79.74 |
| 4 | Andhra Pradesh | 31-05-2020 | M | 17.43 | 12988845 | 36.46 | South | 15.9129 | 79.74 |
#checking how many rows and columns are present in our dataset
data.shape
(267, 9)
#describtion about statistical analysis
data.describe()
| Estimated Unemployment Rate | Estimated Employed | Estimated Labour Participation Rate | longitude | latitude | |
|---|---|---|---|---|---|
| count | 267.000000 | 2.670000e+02 | 267.000000 | 267.000000 | 267.000000 |
| mean | 12.236929 | 1.396211e+07 | 41.681573 | 22.826048 | 80.532425 |
| std | 10.803283 | 1.336632e+07 | 7.845419 | 6.270731 | 5.831738 |
| min | 0.500000 | 1.175420e+05 | 16.770000 | 10.850500 | 71.192400 |
| 25% | 4.845000 | 2.838930e+06 | 37.265000 | 18.112400 | 76.085600 |
| 50% | 9.650000 | 9.732417e+06 | 40.390000 | 23.610200 | 79.019300 |
| 75% | 16.755000 | 2.187869e+07 | 44.055000 | 27.278400 | 85.279900 |
| max | 75.850000 | 5.943376e+07 | 69.690000 | 33.778200 | 92.937600 |
#checking number states
data.States.unique()
array(['Andhra Pradesh', 'Assam', 'Bihar', 'Chhattisgarh', 'Delhi', 'Goa',
'Gujarat', 'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir',
'Jharkhand', 'Karnataka', 'Kerala', 'Madhya Pradesh',
'Maharashtra', 'Meghalaya', 'Odisha', 'Puducherry', 'Punjab',
'Rajasthan', 'Sikkim', 'Tamil Nadu', 'Telangana', 'Tripura',
'Uttar Pradesh', 'Uttarakhand', 'West Bengal'], dtype=object)
#checking number of Regions
data.Region.unique()
array(['South', 'Northeast', 'East', 'West', 'North'], dtype=object)
#checking Estimated Employed rate of india
data.columns=['States', 'Date', 'Frequency', 'Estimated Unemployment Rate',
'Estimated Employed', 'Estimated Labour Participation Rate',
'Region', 'longitude', 'latitude']
plt.figure(figsize=(7,6))
plt.title("Estimated Employed Rate of India")
sns.histplot(x="Estimated Employed", hue="Region", data=data)
plt.show()
#Estimated Labour Participation Rate based on Regions of Indian
plt.figure(figsize=(7,6))
plt.title("Estimated Labour Participation Rate based on Regions of Indian ")
sns.histplot(x="Estimated Labour Participation Rate", hue="Region", data=data)
plt.show()
#estimating Labour Participation Rate for each region and states
fig = px.histogram(data, x='Estimated Labour Participation Rate',y='States' ,color='Region')
fig.show()
#checking unemployment rate according to different regions of India
data.columns=['States', 'Date', 'Frequency', 'Estimated Unemployment Rate',
'Estimated Employed', 'Estimated Labour Participation Rate',
'Region', 'longitude', 'latitude']
plt.figure(figsize=(7, 6))
plt.title("Unemployment Rate According to Different Regions of Indian ")
sns.histplot(x="Estimated Unemployment Rate", hue="Region", data=data)
plt.show()
#Avg Estimated Unemployment Rate for each state and region
plot_Estimated_Unemployment = data[['Estimated Unemployment Rate','States']]
Estimated_Unemployment_Rate= plot_Estimated_Unemployment.groupby('States').mean().reset_index()
Estimated_Unemployment_Rate = Estimated_Unemployment_Rate.sort_values('Estimated Unemployment Rate')
fig = px.scatter(Estimated_Unemployment_Rate, x='States',y='Estimated Unemployment Rate',color='States',title='Average Estimated Unemployment Rate in each state',template='plotly')
fig.show()
#estimating average empolyed rate in each state
plot_Estimated_Employed = data[['Estimated Employed','States']]
Estimated_Employed= plot_Estimated_Employed .groupby('States').mean().reset_index()
Estimated_Employed = Estimated_Employed.sort_values('Estimated Employed')
fig = px.histogram(Estimated_Employed, x='States',y='Estimated Employed',color='States',title='Average Estimated Employed in each state',template='plotly')
fig.show()